Data Visualization

A practical Introduction

By Kieran Healy

library(gapminder)
library(tidyverse)
library(ggplot2)
library(socviz)
library(ggrepel)
library(scales)
library(dplyr)
head(asasec)
##                                Section         Sname Beginning Revenues
## 1      Aging and the Life Course (018)         Aging     12752    12104
## 2     Alcohol, Drugs and Tobacco (030) Alcohol/Drugs     11933     1144
## 3 Altruism and Social Solidarity (047)      Altruism      1139     1862
## 4            Animals and Society (042)       Animals       473      820
## 5             Asia/Asian America (024)          Asia      9056     2116
## 6            Body and Embodiment (048)          Body      3408     1618
##   Expenses Ending Journal Year Members
## 1    12007  12849      No 2005     598
## 2      400  12677      No 2005     301
## 3     1875   1126      No 2005      NA
## 4     1116    177      No 2005     209
## 5     1710   9462      No 2005     365
## 6     1920   3106      No 2005      NA
p = ggplot(data = subset(asasec, Year == 2014), mapping = aes(x = Members, y = Revenues, label = Sname))

p + geom_point() + geom_smooth()

p = ggplot(data = subset(asasec, Year == 2014), mapping = aes(x = Members, y = Revenues, label = Sname))

p + geom_point(mapping = aes(color = Journal)) + geom_smooth(method="lm")

p0 = ggplot(data = subset(asasec, Year == 2014), mapping = aes(x = Members, y = Revenues, label = Sname))

p1 = p0 + geom_smooth(method = "lm", se = FALSE, color = "gray80") + geom_point(mapping = aes(color = Journal))

p2 = p1 + geom_text_repel(data = subset(asasec, Year == 2014 & Revenues > 7000), size = 2)
p2

p3 = p2 + labs(x = "Membership", y="Revenues", color = "Section has own Journal", title = "ASA Sections",
               subtitle = "2014 Calendar year.", caption = "Source: ASA annual report.")

p4 = p3 + scale_y_continuous(labels = scales::dollar) + theme(legend.position = "bottom")

p4

Question: What is Likert scale?

p = ggplot(data = organdata, mapping = aes(x = roads, y = donors, color = world))

p + geom_point(size = 2) + scale_color_brewer(palette = "Set2") + theme(legend.position = "top")

demo('colors')
## 
## 
##  demo(colors)
##  ---- ~~~~~~
## 
## > ### ----------- Show (almost) all named colors ---------------------
## > 
## > ## 1) with traditional 'graphics' package:
## > showCols1 <- function(bg = "gray", cex = 0.75, srt = 30) {
## +     m <- ceiling(sqrt(n <- length(cl <- colors())))
## +     length(cl) <- m*m; cm <- matrix(cl, m)
## +     ##
## +     require("graphics")
## +     op <- par(mar=rep(0,4), ann=FALSE, bg = bg); on.exit(par(op))
## +     plot(1:m,1:m, type="n", axes=FALSE)
## +     text(col(cm), rev(row(cm)), cm,  col = cl, cex=cex, srt=srt)
## + }
## 
## > showCols1()

## 
## > ## 2) with 'grid' package:
## > showCols2 <- function(bg = "grey", cex = 0.75, rot = 30) {
## +     m <- ceiling(sqrt(n <- length(cl <- colors())))
## +     length(cl) <- m*m; cm <- matrix(cl, m)
## +     ##
## +     require("grid")
## +     grid.newpage(); vp <- viewport(width = .92, height = .92)
## +     grid.rect(gp=gpar(fill=bg))
## +     grid.text(cm, x = col(cm)/m, y = rev(row(cm))/m, rot = rot,
## +               vp=vp, gp=gpar(cex = cex, col = cm))
## + }
## 
## > showCols2()

## 
## > showCols2(bg = "gray33")

## 
## > ###
## > 
## > ##' @title Comparing Colors
## > ##' @param col
## > ##' @param nrow
## > ##' @param ncol
## > ##' @param txt.col
## > ##' @return the grid layout, invisibly
## > ##' @author Marius Hofert, originally
## > plotCol <- function(col, nrow=1, ncol=ceiling(length(col) / nrow),
## +                     txt.col="black") {
## +     stopifnot(nrow >= 1, ncol >= 1)
## +     if(length(col) > nrow*ncol)
## +         warning("some colors will not be shown")
## +     require(grid)
## +     grid.newpage()
## +     gl <- grid.layout(nrow, ncol)
## +     pushViewport(viewport(layout=gl))
## +     ic <- 1
## +     for(i in 1:nrow) {
## +         for(j in 1:ncol) {
## +             pushViewport(viewport(layout.pos.row=i, layout.pos.col=j))
## +             grid.rect(gp= gpar(fill=col[ic]))
## +             grid.text(col[ic], gp=gpar(col=txt.col))
## +             upViewport()
## +             ic <- ic+1
## +         }
## +     }
## +     upViewport()
## +     invisible(gl)
## + }
## 
## > ## A Chocolate Bar of colors:
## > plotCol(c("#CC8C3C", paste0("chocolate", 2:4),
## +           paste0("darkorange", c("",1:2)), paste0("darkgoldenrod", 1:2),
## +           "orange", "orange1", "sandybrown", "tan1", "tan2"),
## +         nrow=2)

## 
## > ##' Find close R colors() to a given color {original by Marius Hofert)
## > ##' using Euclidean norm in (HSV / RGB / ...) color space
## > nearRcolor <- function(rgb, cSpace = c("hsv", "rgb255", "Luv", "Lab"),
## +                        dist = switch(cSpace, "hsv" = 0.10, "rgb255" = 30,
## +                        "Luv" = 15, "Lab" = 12))
## + {
## +     if(is.character(rgb)) rgb <- col2rgb(rgb)
## +     stopifnot(length(rgb <- as.vector(rgb)) == 3)
## +     Rcol <- col2rgb(.cc <- colors())
## +     uniqC <- !duplicated(t(Rcol)) # gray9 == grey9 (etc)
## +     Rcol <- Rcol[, uniqC] ; .cc <- .cc[uniqC]
## +     cSpace <- match.arg(cSpace)
## +     convRGB2 <- function(Rgb, to)
## +         t(convertColor(t(Rgb), from="sRGB", to=to, scale.in=255))
## +     ## the transformation,  rgb{0..255} --> cSpace :
## +     TransF <- switch(cSpace,
## +                      "rgb255" = identity,
## +                      "hsv" = rgb2hsv,
## +                      "Luv" = function(RGB) convRGB2(RGB, "Luv"),
## +                      "Lab" = function(RGB) convRGB2(RGB, "Lab"))
## +     d <- sqrt(colSums((TransF(Rcol) - as.vector(TransF(rgb)))^2))
## +     iS <- sort.list(d[near <- d <= dist])# sorted: closest first
## +     setNames(.cc[near][iS], format(zapsmall(d[near][iS]), digits=3))
## + }
## 
## > nearRcolor(col2rgb("tan2"), "rgb")
##          0.0         21.1         25.8         29.5 
##       "tan2"       "tan1" "sandybrown"    "sienna1" 
## 
## > nearRcolor(col2rgb("tan2"), "hsv")
##       0.0000       0.0410       0.0618       0.0638       0.0667       0.0766 
##       "tan2"    "sienna2"     "coral2"    "tomato2"       "tan1"      "coral" 
##       0.0778       0.0900       0.0912       0.0918 
##    "sienna1" "sandybrown"     "coral1"     "tomato" 
## 
## > nearRcolor(col2rgb("tan2"), "Luv")
##         0.00         7.42         7.48        12.41        13.69 
##       "tan2"       "tan1" "sandybrown"    "orange3"    "orange2" 
## 
## > nearRcolor(col2rgb("tan2"), "Lab")
##         0.00         5.56         8.08        11.31 
##       "tan2"       "tan1" "sandybrown"       "peru" 
## 
## > nearRcolor("#334455")
##          0.0867 
## "darkslategray" 
## 
## > ## Now, consider choosing a color by looking in the
## > ## neighborhood of one you know :
## > 
## > plotCol(nearRcolor("deepskyblue", "rgb", dist=50))

## 
## > plotCol(nearRcolor("deepskyblue", dist=.1))

## 
## > plotCol(nearRcolor("tomato", "rgb", dist= 50), nrow=3)

## 
## > plotCol(nearRcolor("tomato", "hsv", dist=.12), nrow=3)

## 
## > plotCol(nearRcolor("tomato", "Luv", dist= 25), nrow=3)

## 
## > plotCol(nearRcolor("tomato", "Lab", dist= 18), nrow=3)

# Democrat Blue and Republican Red

party_colors = c("#2E74C0", "#CB454A")

p0 = ggplot(data = subset(county_data, flipped = "No"),
            mapping = aes(x = pop, y = black/100))

p1 = p0 + geom_point(alpha = 0.15, color = "gray50") + scale_x_log10(labels = scales::comma)

p1

p2 = p1 + geom_point(data = subset(county_data, flipped == "Yes"),
                     mapping = aes(x=pop, y = black/100, color = partywinner16)) +
  scale_color_manual(values = party_colors)

p2

p3 = p2 + scale_y_continuous(labels=scales::percent)+
  labs(color = "County flipped to ...",
       x = "County Population (log scale)",
       y = "Percent Black Population",
       title = "Flipped counties, 2016",
       caption = "Counties in gray did not flip.")

p3

Question: What does gather() do??

cowplot::plot_grid(p1, p2, nrow=2, rel_heights = c(0.75, 0.25), align = "v")

library(viridis)

f_labs = c(`Borrowers` = "Percent of\nall Borrowers",
           `Balances` = "Percent of\nall Balances")

p_xlab = "Amount Owed, in thousands of Dollars"
p_title = "Outstanding Student Loans"
p_subtitle = "44 million borrowers owe a total of $1.3 trillion"
p_caption = "Source: FRB NY"


p = ggplot(studebt, aes(y = pct/100, x = type, fill = Debtrc))
p + geom_bar(stat = "identity", color = "gray80") +
  scale_x_discrete(labels = as_labeller(f_labs)) + 
  scale_y_continuous(labels = scales::percent) +
  scale_fill_viridis(discrete = TRUE) +
  guides(fill = guide_legend(reverse = TRUE,
                             title.position = "top",
                             label.position = "bottom",
                             keywidth = 3,
                             nrow = 1)) +
  labs(x = NULL, y = NULL,
       fill = "Amount Owed, in thousands of dollars",
       caption = p_caption,
       title = p_title,
       subtitle = p_subtitle) +
  theme(legend.position = "top",
        axis.text.y = element_text(face = "bold", hjust = 1, size = 12),
        axis.ticks.length = unit(0, "cm"),
        panel.grid.major.y = element_blank()) +
  coord_flip()

Chapter 8.6 Where to Go Next